library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(patchwork)
library(bench)Rolling functions with missing values
Required packages
Needed to visualize and simplify showing the results.
Cumulative sum (cumsum())
This function returns the cumulative sum of the elements of a vector.
[[cpp11::register]] doubles cumsum2_cpp_(doubles x, bool na_rm = false) {
int n = x.size();
writable::doubles out(n);
out[0] = x[0];
if (na_rm == true) {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = y1 + 0.0;
} else {
if (ISNAN(y1)) {
out[i] = 0.0 + y2;
} else {
out[i] = y1 + y2;
}
}
}
} else {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = NA_REAL;
} else {
if (ISNAN(y1)) {
out[i] = NA_REAL;
} else {
out[i] = y1 + y2;
}
}
}
}
return out;
}I also need to add the corresponding auxiliary function for the documentation.
#' Return the cumulative sum of the coordinates of a vector (C++)
#' @param x numeric vector
#' @param na_rm logical. Should missing values (including `NaN`) be removed?
#' @export
cumsum2_cpp <- function(x, na_rm = FALSE) {
cumsum2_cpp_(as.double(x), na_rm = na_rm)
}A benchmark of the two functions is the following.
set.seed(123) # for reproducibility
x <- rpois(1e6, lambda = 2) # 1,000,000 elements
cumsum(c(1, NA, 2, 4))[1] 1 NA NA NA
cumsum2_cpp(c(1, NA, 2, 4))[1] 1 NA NA NA
cumsum2_cpp(c(1, NA, 2, 4), na_rm = TRUE)[1] 1 1 3 7
mark(
cumsum(x),
cumsum2_cpp(x)
)# A tibble: 2 × 6
expression min median `itr/sec` mem_alloc `gc/sec`
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
1 cumsum(x) 1.83ms 1.89ms 513. 3.81MB 73.2
2 cumsum2_cpp(x) 30.93ms 35.35ms 28.3 15.26MB 33.0
Cumulative product (cumprod())
The C++ implementation is similar to the previous part.
[[cpp11::register]] doubles cumprod2_cpp_(doubles x, bool na_rm = false) {
int n = x.size();
writable::doubles out(n);
out[0] = x[0];
if (na_rm == true) {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = y1 * 1.0;
} else {
if (ISNAN(y1)) {
out[i] = 1.0 * y2;
} else {
out[i] = y1 * y2;
}
}
}
} else {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = NA_REAL;
} else {
if (ISNAN(y1)) {
out[i] = NA_REAL;
} else {
out[i] = y1 * y2;
}
}
}
}
return out;
}I need an auxiliary function to cast the input as double.
#' Return the cumulative product of the coordinates of a vector (C++)
#' @inheritParams cumsum_r
#' @export
cumprod2_cpp <- function(x, na_rm = FALSE) {
cumprod2_cpp_(as.double(x), na_rm = na_rm)
}Test correctness and Benchmark.
cumprod(c(1, NA, 2, 4))[1] 1 NA NA NA
cumprod2_cpp(c(1, NA, 2, 4))[1] 1 NA NA NA
cumprod2_cpp(c(1, NA, 2, 4), na_rm = TRUE)[1] 1 1 2 8
mark(
cumprod(x),
cumprod2_cpp(x)
)# A tibble: 2 × 6
expression min median `itr/sec` mem_alloc `gc/sec`
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
1 cumprod(x) 1.88ms 2.08ms 339. 15.3MB 418.
2 cumprod2_cpp(x) 30.61ms 31.59ms 31.2 15.3MB 18.7
Cumulative minimum (cummin())
The C++ implementation is similar to the previous part.
[[cpp11::register]] doubles cummin2_cpp_(doubles x, bool na_rm = false) {
int n = x.size();
writable::doubles out(n);
out[0] = x[0];
if (na_rm == true) {
for (int i = 1; i < n; ++i) {
double y1 = x[i - 1], y2 = x[i];
if (ISNAN(y1)) {
out[i] = y2;
} else {
out[i] = std::min(y1, y2);
}
}
} else {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = NA_REAL;
} else {
if (ISNAN(y1)) {
out[i] = NA_REAL;
} else {
out[i] = std::min(y1, y2);
}
}
}
}
return out;
}I also have to add the corresponding auxiliary function for the documentation.
#' Return the cumulative minimum of the coordinates of a vector (C++)
#' @inheritParams cumsum_r
#' @export
cummin2_cpp <- function(x, na_rm = FALSE) {
cummin2_cpp_(as.double(x), na_rm = na_rm)
}Test correctness and benchmark.
cummin(c(1, NA, 2, 4))[1] 1 NA NA NA
cummin2_cpp(c(1, NA, 2, 4))[1] 1 NA NA NA
cummin2_cpp(c(1, NA, 2, 4), na_rm = TRUE)[1] 1 1 1 1
mark(
cummin(x),
cummin2_cpp(x)
)# A tibble: 2 × 6
expression min median `itr/sec` mem_alloc `gc/sec`
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
1 cummin(x) 1.16ms 1.24ms 647. 3.81MB 67.9
2 cummin2_cpp(x) 32.16ms 34.32ms 29.2 15.26MB 43.8
Cumulative maximum (cummax())
The C++ implementation is similar to the previous part.
[[cpp11::register]] doubles cummax2_cpp_(doubles x, bool na_rm = false) {
int n = x.size();
writable::doubles out(n);
out[0] = x[0];
if (na_rm == true) {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y1)) {
out[i] = y2;
} else {
out[i] = std::max(y1, y2);
}
}
} else {
for (int i = 1; i < n; ++i) {
double y1 = out[i - 1], y2 = x[i];
if (ISNAN(y2)) {
out[i] = NA_REAL;
} else {
if (ISNAN(y1)) {
out[i] = NA_REAL;
} else {
out[i] = std::max(y1, y2);
}
}
}
}
return out;
}I also have to add the corresponding auxiliary function for the documentation.
#' Return the cumulative maximum of the coordinates of a vector (C++)
#' @inheritParams cumsum_r
#' @export
cummax2_cpp <- function(x, na_rm = FALSE) {
cummax2_cpp_(as.double(x), na_rm = na_rm)
}Test correctness and benchmark.
cummax(c(1, NA, 2, 4))[1] 1 NA NA NA
cummax2_cpp(c(1, NA, 2, 4))[1] 1 NA NA NA
cummax2_cpp(c(1, NA, 2, 4), na_rm = TRUE)[1] 1 1 2 4
mark(
cummax(x),
cummax2_cpp(x)
)# A tibble: 2 × 6
expression min median `itr/sec` mem_alloc `gc/sec`
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
1 cummax(x) 1.06ms 1.13ms 868. 3.81MB 59.0
2 cummax2_cpp(x) 32.27ms 32.62ms 30.6 15.26MB 11.1